Smart Waste Management: Fill Level Prediction + Route Optimization¶
This notebook bundles:
- Load dataset (bins + telemetry)
- Train Random Forest model to predict fill level
- Detect FULL/OVERFLOW bins
- Optimize garbage truck collection routes (20 trucks)
- Generate route map
In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
Load Datasets¶
In [2]:
bins_df = pd.read_csv("bins_master.csv")
import numpy as np
# Generate / update random fill levels dynamically
bins_df['fill_level_pct'] = np.random.randint(10, 100, size=len(bins_df))
telemetry_df = pd.read_csv("telemetry_24h.csv")
bins_df.head(), telemetry_df.head()
Out[2]:
( bin_id latitude longitude fill_level_pct
0 HYD0001 17.349816 78.437772 16
1 HYD0002 17.580286 78.465702 13
2 HYD0003 17.492798 78.599092 64
3 HYD0004 17.439463 78.419002 31
4 HYD0005 17.262407 78.604377 19,
timestamp bin_id latitude longitude ultrasonic_cm \
0 2025-01-12 00:00:00 HYD0001 17.349816 78.437772 92.35
1 2025-01-12 00:00:00 HYD0002 17.580286 78.465702 99.20
2 2025-01-12 00:00:00 HYD0003 17.492798 78.599092 101.78
3 2025-01-12 00:00:00 HYD0004 17.439463 78.419002 96.48
4 2025-01-12 00:00:00 HYD0005 17.262407 78.604377 101.25
fill_level_pct status
0 5.86 NOT_FULL
1 2.33 NOT_FULL
2 0.07 NOT_FULL
3 3.68 NOT_FULL
4 1.81 NOT_FULL )
In [3]:
bins_df = bins_df[bins_df['bin_id'] != 'bin_id'].reset_index(drop=True)
bins_df.head()
Out[3]:
| bin_id | latitude | longitude | fill_level_pct | |
|---|---|---|---|---|
| 0 | HYD0001 | 17.349816 | 78.437772 | 16 |
| 1 | HYD0002 | 17.580286 | 78.465702 | 13 |
| 2 | HYD0003 | 17.492798 | 78.599092 | 64 |
| 3 | HYD0004 | 17.439463 | 78.419002 | 31 |
| 4 | HYD0005 | 17.262407 | 78.604377 | 19 |
In [4]:
#bins_df = pd.read_csv("bins_master.csv")
# Remove any rows where the bin_id column literally contains the word "bin_id"
# bins_df = bins_df[bins_df['bin_id'] != 'bin_id'].reset_index(drop=True)
# Convert lat/long to float (important)
bins_df['latitude'] = bins_df['latitude'].astype(float)
bins_df['longitude'] = bins_df['longitude'].astype(float)
print(bins_df.head())
bin_id latitude longitude fill_level_pct 0 HYD0001 17.349816 78.437772 16 1 HYD0002 17.580286 78.465702 13 2 HYD0003 17.492798 78.599092 64 3 HYD0004 17.439463 78.419002 31 4 HYD0005 17.262407 78.604377 19
Train RandomForest Model to Predict Fill Level¶
In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
df = telemetry_df.copy()
df["timestamp"] = pd.to_datetime(df["timestamp"])
df["hour"] = df["timestamp"].dt.hour
df["dayofweek"] = df["timestamp"].dt.dayofweek
feature_cols = ["ultrasonic_cm", "latitude", "longitude", "hour", "dayofweek"]
X = df[feature_cols]
y = df["fill_level_pct"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=300, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)
preds = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, preds))
print("R^2:", r2_score(y_test, preds))
MAE: 1.262692105555555 R^2: 0.9934184184629896
Route Optimization (CVRP with OR-Tools)¶
In [6]:
# NOTE: Requires: pip install ortools
from math import radians, sin, cos, sqrt, asin
from ortools.constraint_solver import pywrapcp, routing_enums_pb2
def haversine(lat1, lon1, lat2, lon2):
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2)**2 + np.cos(lat1)*np.cos(lat2)*np.sin(dlon/2)**2
return 2 * 6371 * np.arcsin(np.sqrt(a))
# Select snapshot
timestamp = "2025-01-12 10:00:00"
snap = telemetry_df[telemetry_df["timestamp"] == timestamp]
snap = snap[snap["status"].isin(["FULL", "OVERFLOW"])]
use_df = snap[["bin_id","latitude","longitude","fill_level_pct"]].drop_duplicates()
# Depot = mean location
depot_lat = use_df["latitude"].mean()
depot_lon = use_df["longitude"].mean()
nodes = pd.concat([
pd.DataFrame({"bin_id":["DEPOT"],"latitude":[depot_lat],"longitude":[depot_lon],"fill_level_pct":[0]}),
use_df
], ignore_index=True)
# Demands proportional to fill above 80%
nodes["demand"] = (nodes["fill_level_pct"] - 80).clip(lower=0).astype(int)
# Distance matrix
coords = nodes[["latitude","longitude"]].to_numpy()
N = len(coords)
dist_matrix = np.zeros((N,N))
for i in range(N):
for j in range(N):
dist_matrix[i,j] = haversine(coords[i][0],coords[i][1],coords[j][0],coords[j][1])
num_vehicles = 20
manager = pywrapcp.RoutingIndexManager(N, num_vehicles, 0)
routing = pywrapcp.RoutingModel(manager)
def distance_callback(from_index, to_index):
f = manager.IndexToNode(from_index)
t = manager.IndexToNode(to_index)
return int(dist_matrix[f][t] * 1000)
transit_callback_index = routing.RegisterTransitCallback(distance_callback)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
demands = nodes["demand"].tolist()
capacity = 1000
def demand_callback(from_index):
return demands[manager.IndexToNode(from_index)]
demand_callback_index = routing.RegisterUnaryTransitCallback(demand_callback)
routing.AddDimensionWithVehicleCapacity(demand_callback_index, 0, [capacity]*num_vehicles, True, "Capacity")
search_params = pywrapcp.DefaultRoutingSearchParameters()
search_params.first_solution_strategy = routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC
search_params.local_search_metaheuristic = routing_enums_pb2.LocalSearchMetaheuristic.GUIDED_LOCAL_SEARCH
search_params.time_limit.FromSeconds(10)
solution = routing.SolveWithParameters(search_params)
routes = []
if solution:
for v in range(num_vehicles):
index = routing.Start(v)
route = []
while not routing.IsEnd(index):
route.append(manager.IndexToNode(index))
index = solution.Value(routing.NextVar(index))
route.append(0)
routes.append(route)
routes[:3] # Show first 3 routes
Out[6]:
[[0, 0], [0, 0], [0, 0]]
Generate Combined Route Map (Folium)¶
In [7]:
# Create depot at center of all bins
DEPOT_LAT = bins_df['latitude'].mean()
DEPOT_LON = bins_df['longitude'].mean()
depot = pd.DataFrame([{
'bin_id': 'DEPOT',
'latitude': DEPOT_LAT,
'longitude': DEPOT_LON,
'fill_level_pct': 0
}])
# If your fill level column exists separately, attach it:
bins_df['fill_level_pct'] = bins_df.get('fill_level_pct', 50) # default 50% if missing
nodes = pd.concat([depot, bins_df[['bin_id','latitude','longitude','fill_level_pct']]]).reset_index(drop=True)
# Safety clean
nodes = nodes.dropna(subset=['latitude','longitude']).reset_index(drop=True)
In [8]:
# create DEPOT + nodes table
DEPOT_LAT = bins_df['latitude'].mean()
DEPOT_LON = bins_df['longitude'].mean()
depot = pd.DataFrame([{
'bin_id': 'DEPOT',
'latitude': DEPOT_LAT,
'longitude': DEPOT_LON,
'fill_level_pct': 0
}])
nodes = pd.concat([depot, bins_df[['bin_id','latitude','longitude','fill_level_pct']]], ignore_index=True)
In [9]:
import folium
# Center map on city
m = folium.Map(location=[nodes["latitude"].mean(), nodes["longitude"].mean()], zoom_start=11)
# Draw each vehicle route
colors = ["red","blue","green","purple","orange","darkred","lightred","beige",
"darkblue","darkgreen","cadetblue","darkpurple","pink","lightblue",
"lightgreen","gray","black","lightgray","brown","cyan"]
for v, route in enumerate(routes):
coords = [(nodes.iloc[n]["latitude"], nodes.iloc[n]["longitude"]) for n in route]
folium.PolyLine(coords, color=colors[v % len(colors)], weight=3, opacity=0.8,
tooltip=f"Vehicle {v}").add_to(m)
# Add bin markers (color-coded by fill level)
for i, row in nodes.iterrows():
lat = row["latitude"]
lon = row["longitude"]
fill = row["fill_level_pct"]
bid = row["bin_id"]
if fill >= 80:
color = "red"
elif fill >= 50:
color = "orange"
# (You can also use "yellow" but orange is more visible on map tiles)
else:
color = "green"
folium.CircleMarker(
(lat, lon),
radius=5,
color=color,
fill=True,
fill_opacity=0.9,
popup=f"{bid} | Fill Level: {fill}%"
).add_to(m)
m.save("combined_routes_map_colored.html")
m
Out[9]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]: